package com.yc.projectDemo;

import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.itextpdf.kernel.pdf.*;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Paragraph;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class NewsScraper {
    public static void main(String[] args) {
        try {
            // 读取本地 HTML 文件
            File input = new File("/Users/samuelchen/Desktop/fujian.html");
			org.jsoup.nodes.Document doc = Jsoup.parse(input, "UTF-8");

			// 生成 PDF
			String pdfPath = "news.pdf";
			PdfWriter writer = new PdfWriter(new FileOutputStream(pdfPath));
			PdfDocument pdf = new PdfDocument(writer);
			Document document = new Document(pdf);

			// **加载中文字体**
			String fontPath = "/fonts/NotoSansCJKsc-Regular.otf";
			PdfFont font = PdfFontFactory.createFont(fontPath, PdfEncodings.IDENTITY_H, true);


			// **设置文档字体**
			document.setFont(font);

			// 解析新闻列表
			Elements newsList = doc.select(".list > ul > li");
            for (Element item : newsList) {
                Element titleTag = item.selectFirst("a");
                Element dateTag = item.selectFirst("span");

                if (titleTag != null && dateTag != null) {
                    String title = titleTag.text().trim();
                    String url = titleTag.attr("href").trim();
                    String date = dateTag.text().trim();

					System.out.println(title);
					System.out.println(url);
					System.out.println(date);

					// 添加到 PDF
                    document.add(new Paragraph("标题: " + title));
                    document.add(new Paragraph("时间: " + date));
                    document.add(new Paragraph("链接: http://www.nhc.gov.cn/" + url));
                    document.add(new Paragraph("\n"));
                }
            }

            // 关闭 PDF
            document.close();
            System.out.println("新闻数据已成功导出到: " + pdfPath);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
